Standard genomes are located in /mnt/brick1/ref
.
It's a good idea to symlink to the appropriate genome and/or index files from your project's ref
directory.
The following assumes the following tree for the project:
:--Project
|-ref
|-data
|-sample01
|-sample02
...
|-results
|-sample01
|-sample02
...
|-notebooks<you're working from here>
In [ ]:
num_samples=6
index="../ref/MG1655"
sampleid="sample"
aligner=$(which bowtie2)
In [ ]:
base_dir="../data"
for i in $(seq 1 $num_samples)
do
sample_dir="$base_dir/${sampleid}${i}"
result_dir="../results/${sampleid}${i}"
if [ ! -d "$result_dir" ]; then
echo "Creating $result_dir ..."
mkdir -p $result_dir
fi
read1=$sample_dir/read1.fifo
read2=$sample_dir/read2.fifo
mkfifo $read1
mkfifo $read2
zcat $sample_dir/R1.fastq.gz > $read1 &
zcat $sample_dir/R2.fastq.gz > $read2 &
# Align using bowtie2
$aligner -p 30 -x $index \
-1 $read1 -2 $read2 \
| samtools view -bhS - > "$result_dir/${sampleid}${i}.bam"
rm $sample_dir/*.fifo
done
In [ ]:
base_dir="../data"
for i in $(seq 1 $num_samples)
do
result_dir="../results/${sampleid}${i}"
samtools sort "$result_dir/${sampleid}${i}.bam" \
-@ 30 \
-o "$result_dir/${sampleid}${i}_sorted.bam"
done
In [ ]:
# index .bam files
for i in $(seq 1 $num_samples)
do
result_dir="../results/${sampleid}${i}"
samtools index -@ 30 "$result_dir/${sampleid}${i}_sorted.bam"
done